# Purpose:
# Replicate the exploratory Welch t-test comparing novice and therapist
# requests to re-listen to the recorded response during think-aloud.

library(readxl)
library(dplyr)
library(stringr)

input_file <- "data/change to data file name"
sheet_name <- 1
output_file <- "outputs/07_relisten_group_comparison.txt"

df <- read_excel(input_file, sheet = sheet_name)

required_cols <- c("קבוצה", "Unnamed: 1", "יזום")
missing_cols <- setdiff(required_cols, names(df))
if (length(missing_cols) > 0) {
  stop("Missing required columns: ", paste(missing_cols, collapse = ", "))
}

df_summary <- df %>%
  transmute(
    group_raw = `קבוצה`,
    trial_id = `Unnamed: 1`,
    relisten_count = coalesce(as.numeric(`יזום`), 0)
  ) %>%
  filter(!is.na(group_raw), !is.na(trial_id)) %>%
  mutate(
    participant_id = str_replace(as.character(trial_id), "-\\d+$", ""),
    group = case_when(
      group_raw %in% c("סטודנט", "novice", "student") ~ "novice",
      group_raw %in% c("מטפל", "therapist", "expert") ~ "therapist",
      TRUE ~ as.character(group_raw)
    )
  ) %>%
  group_by(participant_id, group) %>%
  summarise(relisten_total = sum(relisten_count, na.rm = TRUE), .groups = "drop") %>%
  filter(group %in% c("novice", "therapist"))

t_result <- t.test(relisten_total ~ group, data = df_summary)

mean_nov <- mean(df_summary$relisten_total[df_summary$group == "novice"])
sd_nov <- sd(df_summary$relisten_total[df_summary$group == "novice"])
n_nov <- sum(df_summary$group == "novice")
mean_th <- mean(df_summary$relisten_total[df_summary$group == "therapist"])
sd_th <- sd(df_summary$relisten_total[df_summary$group == "therapist"])
n_th <- sum(df_summary$group == "therapist")

pooled_sd <- sqrt(((n_nov - 1) * sd_nov^2 + (n_th - 1) * sd_th^2) / (n_nov + n_th - 2))
cohen_d <- (mean_nov - mean_th) / pooled_sd

sink(output_file)
cat("Re-listen group comparison\n\n")
cat(sprintf("Novice: M = %.3f, SD = %.3f, n = %d\n", mean_nov, sd_nov, n_nov))
cat(sprintf("Therapist: M = %.3f, SD = %.3f, n = %d\n\n", mean_th, sd_th, n_th))
print(t_result)
cat("\nCohen's d:\n")
print(cohen_d)
cat("\nNote: with the uploaded relisten dataset, the means/SDs/t/p match the manuscript, but the reported Welch df in the manuscript does not match exactly.\n")
sink()
